# intersection可以求两个RDD的交集
#coding:utf8
from pyspark import SparkContext,SparkConf

if __name__ == '__main__':
    conf = SparkConf().setAppName("test").setMaster("local[*]")
    sc = SparkContext(conf=conf)

    # 构建员工rdd
    rdd1 = sc.parallelize([(1001,"张三"),(1002,"李四"),(1003,"王五"),(1004,"赵六")])
    # 构建部门rdd
    rdd2 = sc.parallelize([(1001, "张三"), (1002, "李四")])

    print(rdd1.intersection(rdd2).collect())